
** Loading raw payroll from tax authority (SKAT)
use "$data\minwage_full_a700_2013_bin.dta", clear
destring korkod, replace

*** A few steps to correct or drop mis-filed payroll records
** Droping records and corrections. According to SKAT all corrections take the form of
** minus the original (wrong) recod and then a potential new and corrected record
** can be filed.
ge correction = ibertvirkidkod if korkod == 0
replace correction = referidkod if korkod == 1 | korkod == 2
bysort pnr senr indkaar angivpernr correction: egen delete = max(korkod)
drop if delete != 0
drop delete korkod ibertvirkidkod referidkod jobkod

** constructing a month variable
ge month = mofd(angivpernr) - 575

** Collapsing data by indvidual, firm id and month to merge with firm information
collapse (sum) felt_200 = felt_200 (sum) timeantF12, by(pnr senr month) 
destring senr, force replace
** Merging firm information
merge m:1 senr using "$data\ES2015.dta", keep(master matched) keepus(es7604_dr_form_kod es7606_hov_br_nr) nogen				

** Collapsing data by indvidual and month: monthly payments
gsort pnr month -felt_200 // sort by indv month and largest payments
by pnr month: gen n = _n // only above 1 for indv with more than 1 monthly wage payment (has to be different firms)

foreach var in senr es7604_dr_form_kod es7606_hov_br_nr{
	replace `var' = 0 if n > 1 // keep firm information from the highest paycheck (by month)
}

collapse (max) n (sum) felt_200 (sum) timeantF12 (sum) senr (sum) es7604_dr_form_kod (sum) es7606_hov_br_nr ///
		, by(pnr month)

save "$data\minwage_data10_payroll_all_2013.dta", replace

** Finding all individuals between age 14-67
use pnr alder foed_dag if inrange(alder,14, 67) using "E:\Data\rawdata\703788\bef2013.dta", clear

** Reshaping by year to have to create an empty dataset with 12 wage observations for each individual
local s2 = (2013-2007)*12
local s1 = `s2'-11
	
forval j=`s1'/`s2'{
		ge t`j' = .
}
	
reshape long t, i(pnr) j(month)
drop t

** Creating a few birthday related variables
ge brthmnth = month(foed_dag`t')
ge yr18  = year(foed_dag`t') + 18
ge month18 = (yr18-2008)*12+brthmnth // Basline is set to 1 in Jan2008: legacy coding
ge tline = month - month18	

** Merging payroll information on to the population data
merge 1:1 pnr month using "$data\minwage_data10_payroll_all_2013.dta", keep(master matched) nogen		

** Monthly Earnings: trimmed at p1 and p99 **
replace felt_200 = . if felt_200 <= 0

bysort tline: egen p1_felt_200 = pctile(felt_200), p(1)
bysort tline: egen p99_felt_200 = pctile(felt_200), p(99)

ge felt_200_trim = felt_200
replace felt_200_trim = p1_felt_200 if p1_felt_200 > felt_200
replace felt_200_trim = p99_felt_200 if p99_felt_200 < felt_200 & felt_200 !=.

** Monthly Work Hours: trimmed at p1 and p99 **
replace timeantF12 = . if timeantF12 <= 0
replace timeantF12 = . if timeantF12 > 1000
bysort tline: egen p1_timeantF12 = pctile(timeantF12), p(1)
bysort tline: egen p99_timeantF12 = pctile(timeantF12), p(99)

ge timeantF12_trim = timeantF12
replace timeantF12_trim = p1_timeantF12 if p1_timeantF12 > timeantF12
replace timeantF12_trim = p99_timeantF12 if p99_timeantF12 < timeantF12 & timeantF12 !=.

** Indicator for Employed **
ge emplyd = felt_200_trim > 0 & felt_200 !=.

** Calculated hourly wage: trimmed at p1 and p99 **
ge calc_hrly_wage_raw = felt_200 / timeantF12
ge calc_hrly_wage = felt_200 / timeantF12

bysort tline: egen p1_calc_hrly_wage = pctile(calc_hrly_wage), p(1)
bysort tline: egen p99_calc_hrly_wage = pctile(calc_hrly_wage), p(99)

replace calc_hrly_wage = p1_calc_hrly_wage if p1_calc_hrly_wage > calc_hrly_wage
replace calc_hrly_wage = p99_calc_hrly_wage if p99_calc_hrly_wage < calc_hrly_wage & calc_hrly_wage !=.
drop felt_200 timeantF12

** merging income information on occupations (DISCO)
merge m:1 pnr using "E:\Data\rawdata\703788\indh2013.dta", keep(match master) keepus(disco*) nogen
** Saving dataset with 2013 payroll and occupation information 
save "$data\minwage_data10_payroll_disco_age1467_2013.dta", replace

